load packages

#install.packages("elevatr")
#remotes::install_github("wmgeolab/rgeoboundaries")
#load packages
library(vcfR)
library(ggplot2)
library(adegenet)
library(SNPfiltR)
library(StAMPP)
library(viridis)
library(rgeoboundaries)
library(elevatr)
library(raster)
library(sf)
library(ggpubr)

Read in data

#check out the details on the filtered vcf
vcfR <- read.vcfR("~/Desktop/mex.towhees/towhee.75.mac2.nomito.vcf")
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 4800
##   header_line: 4801
##   variant count: 5004
##   column count: 41
## 
Meta line 1000 read in.
Meta line 2000 read in.
Meta line 3000 read in.
Meta line 4000 read in.
Meta line 4800 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 5004
##   Character matrix gt cols: 41
##   skip: 0
##   nrows: 5004
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant: 5004
## All variants processed
vcfR
## ***** Object of Class vcfR *****
## 32 samples
## 2668 CHROMs
## 5,004 variants
## Object size: 7.6 Mb
## 2.643 percent missing data
## *****        *****         *****
#read in sample info csv
sample.info<-read.csv("~/Desktop/mex.towhees/sample.locs.csv")

#make sure sampling file matches vcf
sample.info$ID %in% colnames(vcfR@gt)[-1]
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE
colnames(vcfR@gt)[-1] %in% sample.info$ID
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE
#if needed, subset sampling file to include only samples in vcf
#sample.info<-sample.info[sample.info$id %in% colnames(vcfR@gt)[-1],]

#reorder sampling file to match order of samples in vcf
sample.info<-sample.info[match(colnames(vcfR@gt)[-1], sample.info$ID),]
sample.info$ID == colnames(vcfR@gt)[-1]
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE
sample.info$sample.loc<-as.character(sample.info$sample.loc)

#add mitochondrial haplotype info to dataframe
sample.info$haplotype<-c(rep("maculatus", times=6),rep("ocai", times=4),
                         rep("maculatus", times=18),rep("ocai", times=3),"maculatus")

Make color coded sampling map (pretty one for Fig. 1 and empty one for SNAPP figure)

#make dataframe with only the 8 unique sampling locs
unq<-sample.info[!duplicated(sample.info$lat),]

#download elevation data for Mexico
mex_bound <- geoboundaries("Mexico")
elevation_data <- get_elev_raster(locations = mex_bound, z = 6, clip = "locations")
## Registered S3 method overwritten by 'httr':
##   method           from  
##   print.cache_info hoardr
## Mosaicing & Projecting
## Clipping DEM to locations
## Note: Elevation units are in meters.
elevation_data <- as.data.frame(elevation_data, xy = TRUE)
colnames(elevation_data)[3] <- "elevation"
# remove rows of data frame with one or more NA's,using complete.cases
elevation_data <- elevation_data[complete.cases(elevation_data), ]

#make subset plotting area corresponding to our sampling
#subset elevation data based on latitude and longitude limits
elevation_data<-elevation_data[elevation_data$x >= -105.5 & elevation_data$x <= -96 &
                              elevation_data$y >= 16 & elevation_data$y <= 23,]

#subset mexico map outline based on limits
mex_bound <- st_crop(mex_bound, xmin = -105.5, xmax = -96,
                                    ymin = 16, ymax = 23)
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
#make custom color-set based on towhee habitat
elevation_data$towhee.hab<-elevation_data$elevation

#color-code inhospitable habitat versus hospitable
elevation_data$towhee.hab<-ifelse(elevation_data$towhee.hab < 1676.4, "unsuitable","marginal")
table(elevation_data$towhee.hab)
## 
##   marginal unsuitable 
##     188945     239253
#color code good versus marginal habitat
good.hab <- elevation_data$elevation > 2133.6
table(replace(elevation_data$towhee.hab, good.hab, "suitable"))
## 
##   marginal   suitable unsuitable 
##     104234      84711     239253
elevation_data$towhee.hab<-replace(elevation_data$towhee.hab, good.hab, "suitable")

#plot the map with ggplot
ggplot() +
  geom_raster(data = elevation_data, aes(x = x, y = y, fill = elevation)) +
  geom_sf(data = mex_bound, color = "black", fill = NA, cex=.3) +
  geom_point(data = unq, aes(x = long, y = lat, color=sample.loc), size=10, alpha =.8, show.legend=FALSE) +
  scale_color_brewer(palette = "RdGy")+
  geom_text(data = unq, aes(x = long, y = lat, label = sample.loc), size = 6, color="white")+
  #scale_fill_gradient(low = "white", high = "black")+
  scale_fill_gradientn(colours = terrain.colors(50))+
  labs(x = "Longitude", y = "Latitude", fill = "Elevation\n(meters)")+
  theme_classic()+
  theme(legend.position = c(0.01, 0.01), legend.justification = c(0.01, 0.01),
        legend.background = element_blank())

#plot the map with ggplot
ggplot() +
  geom_raster(data = elevation_data, aes(x = x, y = y, fill = towhee.hab)) +
  scale_fill_discrete(type =c("gray","black","white"))+
  geom_sf(data = mex_bound, color = "black", fill = NA, cex=.3) +
  geom_point(data = unq, aes(x = long, y = lat, color="red"), size=3, show.legend=FALSE) +
  #scale_color_brewer(palette = "RdGy")+
  #geom_text(data = unq, aes(x = long, y = lat, label = sample.loc), size = 5, color="white")+
  labs(x = "Longitude", y = "Latitude", fill = "Elevation")+
  theme_classic()+
  theme(legend.position = c(0.01, 0.01), legend.justification = c(0.01, 0.01),
        legend.background = element_blank())

elev.map<-ggplot() +
  geom_raster(data = elevation_data, aes(x = x, y = y, fill = towhee.hab)) +
  scale_fill_discrete(type =c("gray","black","white"))+
  geom_sf(data = mex_bound, color = "black", fill = NA, cex=.3) +
  geom_point(data = unq, aes(x = long, y = lat, color="red"), size=3, show.legend=FALSE) +
  #scale_color_brewer(palette = "RdGy")+
  #geom_text(data = unq, aes(x = long, y = lat, label = sample.loc), size = 5, color="white")+
  labs(x = "Longitude", y = "Latitude", fill = "Elevation")+
  theme_classic()+
  theme(legend.position = c(0.01, 0.01), legend.justification = c(0.01, 0.01),
        legend.background = element_blank())

ggsave(plot = elev.map, filename = "~/Desktop/mex.towhees/sibley.elev.map.pdf", width=8.5, height=5.5)

#plot the map with ggplot
ggplot() +
  geom_raster(data = elevation_data, aes(x = x, y = y, fill = elevation)) +
  geom_sf(data = mex_bound, color = "black", fill = NA, cex=.3) +
  geom_point(data = unq, aes(x = long, y = lat, color=sample.loc), size=10, alpha =.8, show.legend=FALSE) +
  scale_color_brewer(palette = "PuOr")+
  geom_text(data = unq, aes(x = long, y = lat, label = sample.loc), size = 6, color="white")+
  scale_fill_gradient(low = "white", high = "black")+
  #scale_fill_gradientn(colours = terrain.colors(50))+
  labs(x = "Longitude", y = "Latitude", fill = "Elevation\n(meters)")+
  theme_classic()+
  theme(legend.position = c(0.01, 0.01), legend.justification = c(0.01, 0.01),
        legend.background = element_blank())

ggplot() +
  geom_raster(data = elevation_data, aes(x = x, y = y, fill = elevation)) +
  geom_sf(data = mex_bound, color = "black", fill = NA, cex=.3) +
  geom_point(data = unq, aes(x = long, y = lat, color=sample.loc), size=10, alpha =.8, show.legend=FALSE) +
  scale_color_brewer(palette = "BrBG")+
  geom_text(data = unq, aes(x = long, y = lat, label = sample.loc), size = 6, color="white")+
  scale_fill_gradient(low = "white", high = "black")+
  #scale_fill_gradientn(colours = terrain.colors(50))+
  labs(x = "Longitude", y = "Latitude", fill = "Elevation\n(meters)")+
  theme_classic()+
  theme(legend.position = c(0.01, 0.01), legend.justification = c(0.01, 0.01),
        legend.background = element_blank())

#make empty map for SNAPP figure
pac<-map_data("world")
#
ggplot()+
  geom_polygon(data = pac, aes(x=long, y = lat, group = group), fill="seashell", col="black", cex=.3)+
  coord_sf(xlim = c(-105.5, -96), ylim = c(16, 23)) + 
  geom_point(data = unq, aes(x = long, y = lat, color=sample.loc), size=10, alpha =.8, show.legend=TRUE) +
  scale_color_brewer(palette = "RdGy")+
  theme_classic()+
  geom_text(data = unq, aes(x = long, y = lat, label = sample.loc), size = 6, color="white")+
  theme(legend.position = "none")+
  xlab("Longitude")+
  ylab("Latitude")

ggplot()+
  geom_polygon(data = pac, aes(x=long, y = lat, group = group), fill="oldlace", col="black", cex=.3)+
  coord_sf(xlim = c(-105.5, -96), ylim = c(16, 23)) + 
  geom_point(data = unq, aes(x = long, y = lat, color=sample.loc), size=10, alpha =.8, show.legend=TRUE) +
  scale_color_brewer(palette = "RdGy")+
  theme_classic()+
  geom_text(data = unq, aes(x = long, y = lat, label = sample.loc), size = 6, color="white")+
  theme(legend.position = "none")+
  xlab("Longitude")+
  ylab("Latitude")

Make PCA for both morphology and genome, each colored by sampling locality matching map

#morph
morph.pca <- prcomp(sample.info[,c(8:13)], center = TRUE,scale. = TRUE)
sample.info$pc1morph<-morph.pca$x[,1]
sample.info$pc2morph<-morph.pca$x[,2]
ggplot(sample.info, aes(x=pc1morph, y=pc2morph, color=sample.loc)) +
  geom_point(cex=4, alpha=.8)+
  theme_classic()+
  scale_color_brewer(palette = "RdGy")

#plumage
morph.pca <- prcomp(sample.info[,c(16:21)], center = TRUE,scale. = TRUE)
sample.info$pc1plum<-morph.pca$x[,1]
sample.info$pc2plum<-morph.pca$x[,2]
ggplot(sample.info, aes(x=pc1plum, y=pc2plum, color=sample.loc)) +
  geom_point(cex=4, alpha=.8)+
  theme_classic()+
  scale_color_brewer(palette = "RdGy")

summary(morph.pca)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6
## Standard deviation     2.1211 0.8234 0.6099 0.48872 0.37894 0.26199
## Proportion of Variance 0.7498 0.1130 0.0620 0.03981 0.02393 0.01144
## Cumulative Proportion  0.7498 0.8628 0.9248 0.96463 0.98856 1.00000
#plot genetic pca
popm<-data.frame(id=sample.info$ID, pop=sample.info$sample.loc)
gen.pc<-assess_missing_data_pca(vcfR, popmap = popm, clustering = FALSE)

#add genetic PC1 & PC2 to sample info dataframe
sample.info$genpc1<-gen.pc$PC1
sample.info$genpc2<-gen.pc$PC2

#plot genetic PCA
ggplot(sample.info, aes(x=genpc1, y=genpc2, color=sample.loc, shape=haplotype)) +
  geom_point(cex=4, alpha=.8)+
  theme_classic()+
  scale_color_brewer(palette = "RdGy")

#plot genetic PC1 against plumage PC1
ggplot(sample.info, aes(x=genpc1, y=pc1plum, color=sample.loc)) +
  geom_point(cex=4, alpha=.8)+
  theme_classic()+
  scale_color_brewer(palette = "RdGy")

#plot genetic PC1 against morph PC1
ggplot(sample.info, aes(x=genpc1, y=pc1morph, color=sample.loc)) +
  geom_point(cex=4, alpha=.8)+
  theme_classic()+
  #geom_smooth(method=lm)+
  scale_color_brewer(palette = "RdGy")

###make Splitstree

#convert to genlight
gen<-vcfR2genlight(vcfR)

pop(gen)<-gen@ind.names
sample.div.mac.75 <- stamppNeisD(gen, pop = FALSE)
#export for splitstree
#stamppPhylip(distance.mat=sample.div.mac.75, file="~/Desktop/mex.towhees/towhee.75.mac.splits.txt")

#75% completeness cutoff with MAC splitstree
knitr::include_graphics(c("/Users/devder/Desktop/mex.towhees/towhee.75.mac.nomito.splitstree.png"))

#add dots color coded by locality in photoshop

Make pairwise Fst heatmap

#calc pairwise Fst
gen@pop<-as.factor(sample.info$sample.loc)
di.heat<-stamppFst(gen)
m<-di.heat$Fsts
#fill in upper triangle of matrix
m[upper.tri(m)] <- t(m)[upper.tri(m)]

#melt for plotting
heat <- reshape::melt(m)
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
heat$X1<-as.character(heat$X1)
heat$X2<-as.character(heat$X2)

#plot with labels
ggplot(data = heat, aes(x=X1, y=X2, fill=value)) + 
  geom_tile()+
  geom_text(data=heat,aes(label=round(value, 2)))+
  theme_minimal()+
  scale_fill_gradient2(low = "white", high = "red", space = "Lab", name="Fst") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Removed 8 rows containing missing values (geom_text).

#identify the number of fixed differences between pops
mat<-extract.gt(vcfR)
conv.mat<-mat
conv.mat[conv.mat == "0/0"]<-0
conv.mat[conv.mat == "0/1"]<-1
conv.mat[conv.mat == "1/1"]<-2
conv.mat<-as.data.frame(conv.mat)
#convert to numeric
for (i in 1:ncol(conv.mat)){
  conv.mat[,i]<-as.numeric(as.character(conv.mat[,i]))
}

#show colnames to verify you're subsetting correctly
colnames(conv.mat) == sample.info$ID
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE
#make vector to fill with fixed diff values
f<-c()

#write for loop to calc number of fixed diffs between each pop
for (i in 1:nrow(heat)){
  #calc af of pop1 and pop2
  pop1.af<-(rowSums(conv.mat[,sample.info$sample.loc == heat$X1[i]], na.rm=T)/(rowSums(is.na(conv.mat[,sample.info$sample.loc == heat$X1[i]]) == FALSE)))/2
  pop2.af<-(rowSums(conv.mat[,sample.info$sample.loc == heat$X2[i]], na.rm=T)/(rowSums(is.na(conv.mat[,sample.info$sample.loc == heat$X2[i]]) == FALSE)))/2
  #store number of fixed differences
  f[i]<-sum(is.na(abs(pop1.af - pop2.af)) == FALSE & abs(pop1.af - pop2.af) == 1) #find fixed SNPs and add to vector
}

#add number of fixed diffs to df
heat$fixed<-f

#fix the assignments
heat$mixed<-heat$value
heat$mixed[c(9,12:15,17,18,20:24,25,29,33,41,44,45,47,49,52,53,57,58,60:63)]<-heat$fixed[c(9,12:15,17,18,20:24,25,29,33,41,44,45,47,49,52,53,57,58,60:63)]

#plot with labels
ggplot(data = heat, aes(x=X1, y=X2, fill=value)) + 
  geom_tile()+
  #scale_x_discrete(limits=levels(heat$X2)[c(1,7,2,3,4,5,6)], labels=c("1:10","12:18","26","11","24:25","19","20:23"))+ 
  #scale_y_discrete(limits=levels(heat$X2)[c(1,7,2,3,4,5,6)], labels=c("1:10","12:18","26","11","24:25","19","20:23"))+
  geom_text(data=heat,aes(label=round(mixed, 2)), size=2.5)+
  theme_minimal()+
  scale_fill_gradient2(low = "white", high = "red", space = "Lab", name="Fst") +
  theme(axis.text.x = element_text(vjust=.25, size=12),
        axis.text.y = element_text(hjust = -.2, size=12),
        axis.title.x = element_blank(), axis.title.y = element_blank())
## Warning: Removed 8 rows containing missing values (geom_text).

Bring in data and plot triangle plot with locality colors

tri.data<-read.csv("~/Desktop/mex.towhees/triangle.plot.df.csv")
tri.data$loc<-as.character(tri.data$loc)

ggplot(tri.data, aes(x=hi.index, y=heterozygosity, color=loc)) +
  geom_point(cex=6, alpha=.8)+
  theme_classic()+
  scale_color_brewer(palette = "RdGy")+
  geom_abline(intercept = 0, slope = 2)+
  geom_abline(intercept = 2, slope = -2)+
  ylim(c(0,1))+
  xlab("Hybrid Index")+
  ylab("Interspecific Heterozygosity")+
  theme(legend.position = "none")

make combined figure

#make figure that
#|_A_|_B_|
#|_C_|_D_|
#|_E_|_F_|
#|_G_|_H_|
#where ABCD = sampling map
#E = plumage PCA
#F = genomic PCA
#G = triangle plot
#H = pairwise Fst heatmap
#consider adding admixture plot in photoshop?
samp.map<-ggplot() +
  geom_raster(data = elevation_data, aes(x = x, y = y, fill = elevation)) +
  geom_sf(data = mex_bound, color = "black", fill = NA, cex=.3) +
  geom_point(data = unq, aes(x = long, y = lat, color=sample.loc), size=10, alpha =.8, show.legend=FALSE) +
  scale_color_brewer(palette = "RdGy")+
  geom_text(data = unq, aes(x = long, y = lat, label = sample.loc), size = 6, color="white")+
  #scale_fill_gradient(low = "white", high = "black")+
  scale_fill_gradientn(colours = terrain.colors(50))+
  labs(x = "Longitude", y = "Latitude", fill = "Elevation\n(meters)")+
  theme_classic()+
  theme(legend.position = c(0.01, 0.01), legend.justification = c(0.01, 0.01),
        legend.background = element_blank())

plum.pca<-ggplot(sample.info, aes(x=pc1plum, y=pc2plum, color=sample.loc)) +
  geom_point(cex=4, alpha=.8)+
  theme_classic()+
  scale_color_brewer(palette = "RdGy")+
  theme(legend.position = "none")+
  labs(x = "Plumage PC1,75% var. expl.", y = "Plumage PC2, 11.3% var. expl.")

gen.pca<-ggplot(sample.info, aes(x=genpc1, y=genpc2, color=sample.loc, shape=haplotype)) +
  geom_point(cex=6, alpha=.8)+
  theme_classic()+
  scale_color_brewer(palette = "RdGy")+
  theme(legend.position = "none")+
  labs(x = "Genomic PC1, 7.9% var. expl.", y = "Genomic PC2, 4.5% var. expl.")

fst.plot<-ggplot(data = heat, aes(x=X1, y=X2, fill=value)) + 
  geom_tile()+
  geom_text(data=heat,aes(label=round(mixed, 2)), size=2.5)+
  theme_minimal()+
  scale_fill_gradient2(low = "white", high = "red", space = "Lab", name="Fst") +
  theme(axis.text.x = element_text(vjust=.25, size=12),
        axis.text.y = element_text(hjust = -.2, size=12),
        axis.title.x = element_blank(), axis.title.y = element_blank())

tri.plot<-ggplot(tri.data, aes(x=hi.index, y=heterozygosity, color=loc)) +
  geom_point(cex=6, alpha=.8)+
  theme_classic()+
  scale_color_brewer(palette = "RdGy")+
  geom_abline(intercept = 0, slope = 2)+
  geom_abline(intercept = 2, slope = -2)+
  ylim(c(0,1))+
  xlab("Hybrid Index")+
  ylab("Interspecific Heterozygosity")+
  theme(legend.position = "none")

#save ggplots
#ggsave("~/Desktop/mex.towhees/elevation.samp.map.pdf", plot=samp.map, width = 6, height = 4.5, units = "in")
#ggsave("~/Desktop/mex.towhees/fst.plot.pdf", plot=fst.plot, width = 3.5, height = 2.6, units = "in")
#ggsave("~/Desktop/mex.towhees/plum.pca.pdf", plot=plum.pca, width = 4, height = 3.5, units = "in")
#ggsave("~/Desktop/mex.towhees/gen.pca.pdf", plot=gen.pca, width = 4, height = 3.5, units = "in")
#ggsave("~/Desktop/mex.towhees/tri.plot.pdf", plot=tri.plot, width = 4, height = 3.5, units = "in")